#!/bin/bash
#this file calculate the gini Index from relates_ask files and also output the points in the gini curve

#the input file format is as follows:
#id1 id2,id3,id4,id4
#id2 id4,id5,id6

#the output file format is follows:
#1 0
#2 0
#3 0
if [ $# -lt 2 ] 
then
    echo "Usage: ./getGiniIndex.sh relates_ask.txt giniRetFile";
    echo "input File structure:"
    echo "id1 id2,id3,id4"
    echo "NOTICE: giniRetFile will be delete firstly"
    exit 1;
else
    srcFile=$1
    dstFile=$2
fi
baseDir=$(dirname "$0")
#stat in link of all the questions
suffix=`basename $dstFile`
in_link_count_tmp="in_link_count_tmp_"$suffix
in_link_count_fillZero="in_link_count_fillZero_"$suffix
in_link_count_fillZero_sort="in_link_count_fillZero_sort_"$suffix

$baseDir/stat_in_link.sh $srcFile > $in_link_count_tmp
#fill the zero in_link quetions to the stat file:
$baseDir/fillZeroToLinkCount.sh $in_link_count_tmp $srcFile > $in_link_count_fillZero

#sort the in_count_file
sort -k2n $in_link_count_fillZero > $in_link_count_fillZero_sort

#get the gini index
$baseDir/calGiniIndex.sh $in_link_count_fillZero_sort $dstFile

#clear the tmp files
rm $in_link_count_tmp
rm $in_link_count_fillZero
rm $in_link_count_fillZero_sort
